<a href="https://colab.research.google.com/github/jshogland/SpatialModelingTutorials/blob/main/Notebooks/CostaRica.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install geemap

In [None]:
!pip install geemap

# Import packages

In [None]:
#import packages
import geopandas as gpd, pandas as pd, os, numpy as np
import ee, geemap

## Authenticate and Initialize the project

In [None]:
# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize(project='ee-jshogland') #you will want to select your personal cloud project

# Read the shape file into a geodataframe
## Display CRS and column names

### If running on Colab you will need to upload the Clasification_Plot.zip file

In [None]:
gdf=gpd.read_file('Clasification_Plots.zip')
display(gdf.crs)
display(gdf.columns)

## Subset the geodataframe to the columns we are interested in and display the dataframe

In [None]:
k_clms = ['plotid','sampleid','Uso','Cobertura','Vegetacion','Herbaceas', 'Pasto_Arb', 'Cultivo','Humedal', 'Terreno','Agua','Otra_clase','SAF','Cambios15_','Gana_Perdi','geometry']
gdf_s=gdf[k_clms]
gdf_s


## Display summary statistics

In [None]:
display(gdf_s.describe())
display(gdf.describe(include='object'))

## Make GeoSeries of the study area and create convex hull

In [None]:
chul=gpd.GeoSeries(gdf_s.unary_union.convex_hull,crs=gdf_s.crs)

## Create definitions for the median and medoid procedures

In [None]:
def maskL8sr(image):
    # Bit 0 - Fill
    # Bit 1 - Dilated Cloud
    # Bit 2 - Cirrus
    # Bit 3 - Cloud
    # Bit 4 - Cloud Shadow
    qaMask = image.select('QA_PIXEL').bitwiseAnd(int('11111', 2)).eq(0)
    saturationMask = image.select('QA_RADSAT').eq(0)
    # Apply the scaling factors to the appropriate bands.
    opticalBands = image.select('SR_B.').multiply(0.0000275).add(-0.2)
    thermalBands = image.select('ST_B.*').multiply(0.00341802).add(149.0)
    #Replace the original bands with the scaled ones and apply the masks.
    return image.addBands(opticalBands, overwrite=True).addBands(thermalBands, overwrite=True).updateMask(qaMask).updateMask(saturationMask)

def median_mosaic(image,fltr=None,refl_bands=['BLUE', 'GREEN', 'RED', 'NIR', 'SWIR1', 'SWIR2']):
    if(fltr is None):
        inCollection = image.filter(fltr).select(refl_bands)
    else:
        inCollection = image.filter(fltr).select(refl_bands)

    return inCollection.median()

def _medoid(col):
    median = ee.ImageCollection(col).median()
    diff=ee.Image(col).subtract(median).pow(ee.Image.constant(2))
    return diff.reduce('sum').addBands(col)


def medoid_mosaic(image, fltr,refl_bands=['BLUE', 'GREEN', 'RED', 'NIR', 'SWIR1', 'SWIR2']):
    if(fltr is None):
        inCollection = image.filter(fltr).select(refl_bands)
    else:
        inCollection = image.filter(fltr).select(refl_bands)

    medoid = inCollection.map(_medoid)
    medoid = ee.ImageCollection(medoid).reduce(ee.Reducer.min(7)).select([1,2,3,4,5,6], refl_bands)
    return medoid



## Set various variable and create the mdoid surface on ee

In [None]:
#make lists fo band names for selections
lc8_bands = ['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7', 'ST_B10', 'QA_PIXEL']#landsat band names
tgt_bands = ['BLUE', 'GREEN', 'RED', 'NIR', 'SWIR1', 'SWIR2', 'TEMP', 'QA_PIXEL']#common band names
refl_bands = ['BLUE', 'GREEN', 'RED', 'NIR', 'SWIR1', 'SWIR2']#bands we care about

#specify start and end dates for the image filter
startDate = '2021-01-01'
endDate = '2024-07-01'

#Specify julian dates for filter. Here we want to select sunny months
julianStart1 = 350# Starting Julian Date (for landsat median cloud free )
julianEnd1 = 365
julianStart2 = 1
julianEnd2 = 150# Ending Julian date (for landsat median cloud free)

#define the study area extent from our convex hull
#geo=geemap.gdf_to_ee(gpd.GeoDataFrame(geometry=chul)) #convert our convex hull into a ee feature class object

#make the ee collection
l8_col=ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')

#set various filters
#f_bnds=ee.Filter.bounds(geometry=geo)
f_date=ee.Filter.date(startDate,endDate)
f_cr1=ee.Filter.calendarRange(julianStart1,julianEnd1)
f_cr2=ee.Filter.calendarRange(julianStart2,julianEnd2)
f_or=ee.Filter.Or(f_cr1,f_cr2)
f_and=ee.Filter.And(f_date,f_or)

#use our filter on the landsat collection
l8=l8_col.filter(f_and).map(maskL8sr)
l8r=l8.select(lc8_bands,tgt_bands)

#call the medoid function
medoid = medoid_mosaic(l8r,fltr=f_and,refl_bands=refl_bands)

## Create a generic Map using geemap and then map the DEM and Medoid surface

In [None]:
Map = geemap.Map()
Map

### Add the EE layers to the map

In [None]:
# get the dem from EE
dem = ee.Image("USGS/SRTMGL1_003")

# Set visualization parameters for the map.
vis_params = {
    "min": 0,
    "max": 4000,
    "palette": ["006633", "E5FFCC", "662A00", "D8D8D8", "F5F5F5"],
}

#Add Earth Engine layers to Map
Map.addLayer(
    medoid, {"bands": ["RED", "GREEN", "BLUE"],
             'min':-0.02,
             'max':0.3
    },
    "Landsat 8",
)

Map.addLayer(dem, vis_params, "SRTM DEM", True, 1)

## Extract DEM and Medoid pixel values

### Due to memory limitation on EE, we will need to subset our data and send multiple requests
#### Let's start with making a function to handle splitting up the data

In [None]:
def get_tiles(gdf,ntiles):
    chul=gpd.GeoSeries(gdf.unary_union.convex_hull,crs=gdf.crs)
    xmin,ymin,xmax,ymax=chul.total_bounds
    sp=(np.sqrt(chul.area/ntiles))[0]
    sp2=(sp/2)
    xs=np.arange(xmin-sp2,xmax+sp2,sp)
    ys=np.arange(ymin-sp2,ymax+sp2,sp)
    xv, yv = np.meshgrid(xs, ys)
    xv = xv.flatten()
    yv = yv.flatten()
    pnts = gpd.GeoSeries(gpd.points_from_xy(x=xv, y=yv),crs=gdf.crs)
    buff = pnts.buffer(sp2,cap_style='square')
    buff = buff[buff.intersects(gdf.unary_union)]
    return buff

def extract_data(gdf,img,ntiles,stats='FIRST',scale=30):
    '''
    Iteratively calls EE and extracts data from the image
    gdf = (geodataframe) of features used to extract values
    img = (ee image object) ee image to extract values from
    ntiles = (int) number of tiles used to extract data at a time
    column_names = (list[string]) list of column names to for output dataframe
    stats= (string) name of the ee static (e.g., FIRST, MEAN, MAX, MIN, MEDIAN, etc.)

    returns a Dataframe of values (one record for each observation in the gdf)
    '''
    tls=get_tiles(gdf,ntiles)
    ogdf=gdf.copy()
    for t in tls:
        sel=ogdf.intersects(t)
        sdf=ogdf[['geometry']][sel]
        #use try and except catch errors
        try:
            fc=geemap.gdf_to_ee(sdf) #convert your subset geodataframe into a ee feature class object
            outfc=geemap.extract_values_to_points(fc,img,stats_type=stats,scale=scale) #extract the image values for each point location.
            ogdf2=geemap.ee_to_gdf(outfc).drop(['geometry'],axis=1) #convert your output ee object into a geodataframe
            column_names=ogdf2.columns
            ogdf.loc[sel,column_names]=ogdf2.values #update records of our geodataframe
        except Exception as e:
            print('Error: ',e)

    return ogdf #return the geodataframe


### Let's extract our dem and medoid data

In [None]:
#get dem values
dem_tbl=extract_data(gdf_s,dem,100) #no reducers can take bigger tiles (less ram on server to process)
medoid_tbl=extract_data(gdf_s,medoid,500) #need to increase tiles to account for reducers (ram limits on server)
display(dem_tbl)
display(medoid_tbl)
#How many calls to EE?


## Let's create our other predictor surfaces (pred2)

In [None]:
# get NDVI from medoid
ndvi = medoid.normalizedDifference(['NIR', 'RED']).rename('ndvi')

#evi
evi = medoid.expression('2.5 * ((NIR - RED) / (NIR + 6 * RED - 7.5 * BLUE + 1))',
                        {
                            'NIR': medoid.select('NIR'),
                            'RED': medoid.select('RED'),
                            'BLUE': medoid.select('BLUE')
                        }).rename('evi')

#savi
savi = medoid.expression('(NIR - RED) / (NIR + RED + .5) * (1 + .5)',
                         {
                             'NIR': medoid.select('NIR'),
                             'RED': medoid.select('RED')
                         }).rename('savi')


#diff index
diff = medoid.select('NIR').subtract(medoid.select('RED')).rename('diff')

#Tasseled cap
coefficients = {
  'brightness': [0.3029, 0.2786, 0.4733, 0.5599, 0.508, 0.1872],
  'greenness': [-0.2941, -0.243, -0.5424, 0.7276, 0.0713, -0.1608],
  'wetness': [0.1511, 0.1973, 0.3283, 0.3407, -0.7117, -0.4559],
}

#Calculate Tasseled Cap The band order is Blue, Green, Red, NIR, SWIR1, SWIR2.
brightness = medoid.select(['BLUE', 'GREEN', 'RED', 'NIR', 'SWIR1', 'SWIR2']).multiply(coefficients['brightness']).reduce('sum').rename('brightness')
greenness = medoid.select(['BLUE', 'GREEN', 'RED', 'NIR', 'SWIR1', 'SWIR2']).multiply(coefficients['greenness']).reduce('sum').rename('greenness')
wetness = medoid.select(['BLUE', 'GREEN', 'RED', 'NIR', 'SWIR1', 'SWIR2']).multiply(coefficients['wetness']).reduce('sum').rename('wetness')

#Elevation and derivatives
# Calculate terrain layers
slope = ee.Terrain.slope(dem)
aspect = ee.Terrain.aspect(dem).rename('aspect')

# Aspect transforms
aspectDeg = aspect.unitScale(-180, 180).rename('aspectdeg')
cosAspect = aspectDeg.cos().rename('aspectcos')
sinAspect = aspectDeg.sin().rename('aspectsin')

#Canopy Height
altura2 = ee.Image('users/nlang/ETH_GlobalCanopyHeight_2020_10m_v1').rename(['altura2'])

#Height above nearest drainage
hand30_100 = ee.ImageCollection("users/gena/global-hand/hand-100").mosaic().rename(['hand30_100'])

#soils
clay_1mMed = ee.Image("projects/soilgrids-isric/clay_mean").unmask(0).multiply([.05,.10,.15,.30,.40,0]).reduce('sum').rename('clay_1mMed')
sand_1mMed = ee.Image("projects/soilgrids-isric/sand_mean").unmask(0).multiply([.05,.10,.15,.30,.40,0]).reduce('sum').rename('sand_1mMed')
silt_1mMed = ee.Image("projects/soilgrids-isric/silt_mean").unmask(0).multiply([.05,.10,.15,.30,.40,0]).reduce('sum').rename('silt_1mMed')
ocs_1mMed = ee.Image("projects/soilgrids-isric/ocs_mean").unmask(0).multiply([.05,.10,.15,.30,.40,0]).reduce('sum').rename('ocs_1mMed')

#LAI
wgs_500m_8d = ee.ImageCollection("projects/sat-io/open-datasets/BU_LAI_FPAR/wgs_500m_8d")
fparProc = wgs_500m_8d.filter(f_and).median().select('FPAR').multiply(0.01).unmask(0).rename('fpar')
laiProc = wgs_500m_8d.filter(f_and).median().select('LAI').multiply(0.01).unmask(0).rename('lai')

#Topograghic indices
topDIV = ee.Image('CSP/ERGo/1_0/Global/SRTM_topoDiversity').add(1323.63).rename('topDiv')
mTPI = ee.Image("CSP/ERGo/1_0/Global/SRTM_mTPI").add(8129).rename('mTPI')

#make a list of predictors
pred2_lst=[medoid,dem,savi,diff,evi,brightness,wetness,ndvi,slope,aspect,
           aspectDeg,cosAspect,sinAspect,altura2,clay_1mMed,sand_1mMed,silt_1mMed,
           ocs_1mMed,fparProc,laiProc,hand30_100,topDIV,mTPI]

# let's combine our pred2 surfaces into one Raster
pred2=ee.Image(pred2_lst)

### Now let's extract pred2 values
#### We are extracting all predictor values this time including modoid and elevation. This is redundant to the other extraction piece meaning we did not need to run the other cell. We could have run it all in one cell.

In [None]:
gdf_f=extract_data(gdf_s,pred2,500,'FIRST',scale=30)

### Let's look at our dataframe

In [None]:
gdf_f

## Is there any missing data?

In [None]:
gdf_f.isna().sum()

## Save out the dataframe. If in Colab, don't forget to download.

In [None]:
#as a shape file
gdf_f.to_file('CostaRica_EE_data.shp')

#as a csv
gdf_f.to_csv('CostaRica_EE_data.csv')

## Let's visually look at all savi records that have nans by adding those locations to our map.

In [None]:
fc=geemap.gdf_to_ee(gdf_f[gdf_f['savi'].isna()])
Map.addLayer(fc,name='SAVI NANs',vis_params={'color':'yellow'})