<a href="https://colab.research.google.com/github/larissavaladao/time_series_curuai/blob/main/2.deglint_sampling/deglint_sample_py6s_field_points.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install and import packages

In [None]:
#import packages used
import ee
import pandas as pd
import geemap
import geopandas as gpd
import matplotlib.pyplot as plt
import json
import math
import geemap
import os

In [None]:
#authenticate and initialize google earth engine (also necessary for geemap)
ee.Authenticate()
ee.Initialize(project = 'ee-curuai')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Import Curuai dataset

In [None]:
#import the entire dataset and include ID column
dataset = pd.read_excel('/content/drive/MyDrive/CURUAI_PROCESS/Dataset_CFP.xlsx', sheet_name='data',na_values='NaN').loc[:383]
# dataset = pd.read_excel('Dataset_CFP.xlsx', sheet_name='data',na_values='NaN').loc[:383]
dataset['ID'] = range(len(dataset))

In [None]:
dataset['DATE_1'] = dataset['DATE'].apply(lambda row: str(row).rsplit(' ')[0])
dataset['year'] = dataset['DATE_1'].apply(lambda row: str(row).rsplit('-')[0])
dataset['month'] = dataset['DATE_1'].apply(lambda row: str(row).rsplit('-')[1])
dataset['day'] = dataset['DATE_1'].apply(lambda row: str(row).rsplit('-')[2])
dataset['Hour'] = dataset['Local Time'].apply(lambda row: str(row).rsplit(':')[0] if (str(row) != 'nan') else None)
dataset['Minute'] = dataset['Local Time'].apply(lambda row:  str(row).rsplit(':')[1] if (str(row) != 'nan') else None )
dataset['Second'] = dataset['Local Time'].apply(lambda row: str(row).rsplit(':')[-1] if (str(row) != 'nan') else None)
dataset

In [None]:
from datetime import date, time
dates = []
for i in range(len(dataset)):
  if dataset['Hour'][i] != None:
    j = pd.Timestamp.combine(date(int(dataset['year'][i]), int(dataset['month'][i]), int(dataset['day'][i])), time(int(dataset['Hour'][i]), int(dataset['Minute'][i]), int(dataset['Second'][i])))
    dates.append(j)
  else:
    j = dataset['DATE'][i]
    dates.append(j)

dataset['datetime'] = dates
dataset



In [None]:
#copy the dataset and select only the variables of interest
dataset_att = dataset[['ID','datetime', 'DEPTH CLASS', 'LATITUDE',
       'LONGITUDE', 'LOCATION', 'SAMPLE SITE', 'WATER PERIOD', 'MISSION',
       'TOTAL DEPTH', 'SAMPLING DEPTH', 'TURBIDITY', 'CHLOROPHYLL',
       'SPM', 'TOC', 'POC', 'DOC', 'SILICA', 'P TOTAL',
       'P ORGANIC', 'N TOTAL','N TOTAL DISSOLVED',
       'CHLOROPHYLL A', 'CHLOROPHYLL B']].copy()
dataset_att.columns

In [None]:
dataset_att.columns = ['ID','datetime', 'DEPTH_CLASS', 'LATITUDE',
       'LONGITUDE', 'LOCATION', 'SAMPLE_SITE', 'WATER_PERIOD', 'MISSION',
       'TOTAL_DEPTH', 'SAMPLING_DEPTH', 'TURBIDITY', 'CHLOROPHYLL', 'SPM',
       'TOC', 'POC', 'DOC', 'SILICA', 'P_TOTAL', 'P_ORGANIC', 'N_TOTAL',
       'N_TOTAL_DISSOLVED', 'CHLOROPHYLL_A', 'CHLOROPHYLL_B']
dataset_att.columns

In [None]:
dataset_att.groupby(['MISSION'])['MISSION'].count()


In [None]:
#transform dataframe in a geodataframe (geometry column with point location)
gdf = gpd.GeoDataFrame(
    dataset_att, geometry=gpd.points_from_xy(dataset_att.LONGITUDE, dataset_att.LATITUDE),
    crs="EPSG:4326"
)
gdf.head()

In [None]:
#transform date and time columns in string - necessary to convert to JSON
gdf['datetime'] = gdf['datetime'].astype("str")

In [None]:
# gdf.to_file("/content/drive/MyDrive/CURUAI_PROCESS/points_curuai.shp")

In [None]:
##Convert geodataframe to json - necessary to be read in GEE
dataset_json = gdf.to_json()

In [None]:
#load and select the features of the json data
data_points = json.loads(dataset_json)
data_points = data_points['features']
# data_points

In [None]:
##transform json in in gee object = feature collection
roi_points = ee.FeatureCollection(data_points)
print(roi_points.size().getInfo())

In [None]:
#function to insert a property with date of the point - Gee date format
def insert_date (feat):
    return feat.set('system:time_start',ee.Date.parse('YYYY-MM-dd HH:mm:ss',feat.get('datetime')))

In [None]:
#apply date function - new system:time_start property
roi_points = roi_points.map(insert_date)
print(roi_points.first().getInfo())

In [None]:
#create a polygon around the floodpalin area - roi
roi_poly = roi_points.geometry().buffer(150).bounds()

# Import GEE images

In [None]:
advance = 17

In [None]:
#define initial and final date to filter the image collection based on the
#field points date
initial_date = ee.Date(roi_points.sort('system:time_start').first().get('system:time_start')).advance(-advance, 'day')
end_date = ee.Date(roi_points.sort('system:time_start',False).first().get('system:time_start')).advance(-advance, 'day')

print('Data inicial de coleta: ',initial_date.format().getInfo())
print('Data final de coleta: ',end_date.format().getInfo())

## Import image collections

###Landsat 7 - PY6S

In [None]:
#Landsat 7 collection 2 tier 1 TOA
#filter collection by region and date (2 month prior toi the field dates and one month past) based on field points
#mask clouds
landsat7 = ee.ImageCollection("projects/ee-curuai/assets/Py6S/LD7/Landsat7")\
            .filterDate(initial_date,end_date)\
            .select([ 'B1', 'B2', 'B3', 'B4', 'B5', 'B7'])
print(ee.Date(landsat7.first().get('system:time_start')).format().getInfo())
print(ee.Date(landsat7.sort('system:time_start',False).first().get('system:time_start')).format().getInfo())

In [None]:
print('collection size',landsat7.size().getInfo())
print('projection',landsat7.first().select('B4').projection().getInfo())
print('spatial resolution',landsat7.first().select('B4').projection().nominalScale().getInfo())
print('bands',landsat7.first().bandNames().getInfo())


###Landsat 8 PY6S

In [None]:
#Landsat 8 collection 2 tier 1 TOA
#filter collection by region and date (2 month prior toi the field dates and one month past) based on field points
#mask clouds
landsat8 = (ee.ImageCollection("projects/ee-curuai/assets/Py6S/LD8/Landsat8")
            .filterDate(initial_date,end_date)
            .select(['B2', 'B3', 'B4', 'B5', 'B6', 'B7']))
print(ee.Date(landsat8.first().get('system:time_start')).format().getInfo())
print(ee.Date(landsat8.sort('system:time_start',False).first().get('system:time_start')).format().getInfo())

In [None]:
print('collection size',landsat8.size().getInfo())
print('projection',landsat8.first().select('B4').projection().getInfo())
print('spatial resolution',landsat8.first().select('B4').projection().nominalScale().getInfo())
print('bands',landsat8.first().bandNames().getInfo())

###Sentinel 2 PY6S

In [None]:
# #Sentinel 2 level 1C harmonized
# sentinel2 = ee.ImageCollection("projects/ee-curuai/assets/Py6S/S2/S2_py6s")\
#             .map(lambda img: img.set({
#                 'system:time_start':ee.ImageCollection("COPERNICUS/S2_HARMONIZED").filter(ee.Filter.eq('GRANULE_ID',img.get('GRANULE_ID'))).first().get('system:time_start'),
#                 'CLOUD_COVER':img.get('CLOUDY_PIXEL_PERCENTAGE')}))\
#             .filterDate(initial_date,end_date)\
#             .select(['B2', 'B3', 'B4', 'B8', 'B11', 'B12'])
# print(ee.Date(sentinel2.first().get('system:time_start')).format().getInfo())
# print(ee.Date(sentinel2.sort('system:time_start',False).first().get('system:time_start')).format().getInfo())

In [None]:
# print('collection size',sentinel2.size().getInfo())
# print('projection',sentinel2.first().select('B4').projection().getInfo())
# print('spatial resolution',sentinel2.first().select('B4').projection().nominalScale().getInfo())
# print('bands',sentinel2.first().bandNames().getInfo())

# Transform into remote sensing  reflectance and sunglint correction

In [None]:
#dividir imagem corrigida por pi
# Rrs_sat_ac = Rsat_ac / pi

#fazer deglint
#Rrs_sat_ac_deglint(VNIR) = Rrs_sat_ac (VNIR) − Rrs_sat_ac (SWIR)
#correção no artigo INPE CURUAI
def deglint (img):
    Rrs = img.divide(math.pi)
    deglint = Rrs.select(['blue','green','red','nir','swir1','swir2'])\
    .subtract(Rrs.select('swir1'))

    return (deglint.copyProperties(img,['system:time_start','CLOUD_COVER',"system:index"]))


## Padronize band names

In [None]:
name_bands = ['blue','green','red','nir','swir1','swir2']

###Landsat 7

In [None]:
#renomear bandas

ld7 = landsat7.map(lambda img: img.rename(name_bands)).map(deglint)
display(ld7.first().getInfo())

### Landsat 8

In [None]:
ld8 = landsat8.map(lambda img: img.rename(name_bands)).map(deglint)
display(ld8.first().getInfo())

### Sentinel 2

In [None]:
# s2 = sentinel2.map(lambda img: img.rename(name_bands)).map(deglint)
# s2.first()

### visualize

In [None]:
Map = geemap.Map(basemap='HYBRID')
Map.centerObject(roi_points,10)
imgld7 = ld7.sort('system:time_start',False).first()
imgLD8 = ld8.sort('system:time_start',False).first()
# imgS2 = s2.sort('CLOUD_COVER').first()
Map.addLayer(imgld7, {'bands':['red','green','blue'], 'min':0,'max':0.02}, str(imgld7.get('system:index').getInfo()))
Map.addLayer(imgLD8, {'bands':['red','green','blue'], 'min':0,'max':0.02}, str(imgLD8.get('system:index').getInfo()))
# Map.addLayer(imgS2, {'bands':['red','green','blue'], 'min':0,'max':0.03}, str(imgS2.get('system:index').getInfo()))

Map.addLayer(roi_points, {'color':'darkred'}, 'Data Points');
# Map.addLayer(roi_poly, {'color':'darkred'}, 'Data polygon');
Map

# Sample data points pixel values

filter images that fall within a 16-day window period from each field point date

In [None]:
def imgs_points(collection):
    def wrap(feat):
        date_point = ee.Date(feat.get('system:time_start'))
        data1 = date_point.advance(-16,'day')\
        .format('yyyy-MM-dd')

        data2 = date_point.advance(16,'day')\
        .format('yyyy-MM-dd')

        filtro = collection.filterDate(data1,data2)\
        .filterBounds(feat.geometry())\
        .map(lambda img: img.set({
            'dif_date_point':ee.Date(img.get('system:time_start')).difference(date_point, 'day')})\
            .copyProperties(feat,['ID']))


        return ee.ImageCollection(filtro.limit(10))

    return wrap

obtain statistics for the same location of the field point - with a 3 pixel window - and filter out if more than 4 pixels are masked   

In [None]:
prj = ld8.first().projection()
prj

In [None]:
def sample_point(img):

  feat = roi_points.filter(ee.Filter.eq('ID',img.get('ID'))).first()
  geom = feat.geometry().buffer(45).bounds()

  mean = img.reduceRegion(geometry=geom,
                          scale=prj.nominalScale(),
                          crs=prj.crs(),
                          reducer=ee.Reducer.mean())
  median = img.reduceRegion(geometry=geom,
                            scale=prj.nominalScale(),
                            crs=prj.crs(),
                            reducer=ee.Reducer.median())
  minMax = img.reduceRegion(geometry=geom,
                            scale=prj.nominalScale(),
                            crs=prj.crs(),
                            reducer=ee.Reducer.minMax())
  count = img.reduceRegion(geometry=geom,
                            scale=prj.nominalScale(),
                            crs=prj.crs(),
                            reducer=ee.Reducer.count())
  std_dev = img.reduceRegion(geometry=geom,
                            scale=prj.nominalScale(),
                            crs=prj.crs(),
                            reducer=ee.Reducer.stdDev())
  return feat.set({
      "system_index": img.get('system:index'),
      'CLOUD_COVER':img.get('CLOUD_COVER'),
      'img_date':ee.Date(img.get('system:time_start')).format(),
      'dif_date_point': img.get('dif_date_point'),

      'blue_mean':mean.get('blue'),
      'green_mean':mean.get('green'),
      'red_mean':mean.get('red'),
      'nir_mean':mean.get('nir'),

      'blue_median':median.get('blue'),
      'green_median':median.get('green'),
      'red_median':median.get('red'),
      'nir_median':median.get('nir'),

      'blue_min':minMax.get('blue_min'),
      'green_min':minMax.get('green_min'),
      'red_min':minMax.get('red_min'),
      'nir_min':minMax.get('nir_min'),

      'blue_max':minMax.get('blue_max'),
      'green_max':minMax.get('green_max'),
      'red_max':minMax.get('red_max'),
      'nir_max':minMax.get('nir_max'),

      'blue_stdDev':std_dev.get('blue'),
      'green_stdDev':std_dev.get('green'),
      'red_stdDev':std_dev.get('red'),
      'nir_stdDev':std_dev.get('nir'),

      "count_pixel":count.get('red')
  })

filter_count = ee.Filter.gt('count_pixel',3)




### Landsat 7

In [None]:
img_pointsLD7 = ee.ImageCollection(roi_points.map(imgs_points(ld7)).flatten().toList(2000))

In [None]:
display(img_pointsLD7.size().getInfo())


In [None]:
display(img_pointsLD7.aggregate_count_distinct('ID').getInfo())

In [None]:
display(img_pointsLD7.limit(5).getInfo())

In [None]:
reduced_LD7 = ee.FeatureCollection(img_pointsLD7.map(sample_point)).filter(filter_count)

In [None]:
display(reduced_LD7.limit(2).getInfo())

In [None]:
display(reduced_LD7.size().getInfo())

In [None]:
display(reduced_LD7.aggregate_count_distinct('ID').getInfo())

## Landsat 8

In [None]:
img_pointsLD8 = ee.ImageCollection(roi_points.map(imgs_points(ld8)).flatten().toList(2000))

In [None]:
display(img_pointsLD8.size().getInfo())


In [None]:
display(img_pointsLD8.aggregate_count_distinct('ID').getInfo())

In [None]:
display(img_pointsLD8.limit(5).getInfo())

In [None]:
reduced_LD8 = ee.FeatureCollection(img_pointsLD8.map(sample_point)).filter(filter_count)

In [None]:
display(reduced_LD8.size().getInfo())


In [None]:
display(reduced_LD8.aggregate_count_distinct('ID').getInfo())

In [None]:
display(reduced_LD8.limit(2).getInfo())

##Sentinel 2

In [None]:
# img_pointsS2 = ee.ImageCollection(roi_points.map(imgs_points(s2)).flatten().toList(2000))

In [None]:
# img_pointsS2.size()

In [None]:
# img_pointsS2.aggregate_count_distinct('ID')

In [None]:
# reduced_S2 = ee.FeatureCollection(img_pointsS2.map(sample_point)).filter(filter_count)

In [None]:
# reduced_S2.size()

In [None]:
# reduced_S2.aggregate_count_distinct('ID')

In [None]:
# reduced_S2.limit(2)

# Export data as CSV file

In [None]:
#Exportar tabelas
#LD7
geemap.ee_to_csv(reduced_LD7, '/content/drive/MyDrive/CURUAI_PROCESS/py6s_LD7_data.csv')

In [None]:
geemap.ee_to_csv(reduced_LD8, '/content/drive/MyDrive/CURUAI_PROCESS/py6s_LD8_data.csv')

In [None]:
# geemap.ee_to_csv(reduced_S2, '/content/drive/MyDrive/CURUAI_PROCESS/py6s_S2_data.csv')