<a href="https://colab.research.google.com/github/kavyajeetbora/CityHealthMonitor/blob/master/notebooks/xee_fundamentals.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [40]:
!pip install --upgrade -q xee
!pip install -q xarray[complete]
!pip install -q spyndex

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/727.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/727.5 kB[0m [31m2.6 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━[0m [32m327.7/727.5 kB[0m [31m4.6 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━[0m [32m573.4/727.5 kB[0m [31m5.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m727.5/727.5 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.7/134.7 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.7/224.7 kB[0m [31m7.1 MB/s[0m eta

In [11]:
import ee
import xarray

ee.Authenticate()
ee.Initialize(project='kavyajeetbora-ee', opt_url='https://earthengine-highvolume.googleapis.com')

## Sential to xarray

- Download the sentinal image collection
- clip it by area of interest
- clean the images: remove cloud cover, cloud shadows etc
- filter by dates

### Select the area of interest

In [13]:
admin = ee.FeatureCollection('FAO/GAUL_SIMPLIFIED_500m/2015/level2')

sonitpur = admin.filter(ee.Filter.eq('ADM1_NAME', 'Assam'))\
.filter(ee.Filter.eq('ADM2_NAME', 'Sonitpur'))

geometry = sonitpur.geometry()

### Filter the image collection

In [169]:
s2 = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')

def cloud_free_sentinel(image):
    scl = image.select('SCL')
    mask = scl.eq(3).Or(scl.gte(7).And(scl.lte(10)))
    return image.updateMask(mask.eq(0))

filtered_img = s2.filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE',10))\
.filter(ee.Filter.date('2017-01-01','2024-04-01'))\
.filter(ee.Filter.bounds(geometry))\
.map(cloud_free_sentinel)

filtered_img.size().getInfo()

402

### Calculate the indices

In [170]:
def calculate_indices(image):
    '''
    Calculate the NDVI and NDWI for given image and add them to the bands
    '''
    ndvi = image.normalizedDifference(['B8','B4']).rename("NDVI")
    ndwi = image.normalizedDifference(['B8','B11']).rename('NDWI')
    image = image.addBands(ndvi).addBands(ndwi)
    return image.select(['NDWI', "NDVI"])

ic = filtered_img.map(calculate_indices)
ic.first().bandNames().getInfo()

['NDWI', 'NDVI']

### Aggregate the images

By Month, we can also do by year or week etc

In [171]:
ic.size().getInfo()

402

In [173]:
ic_list = ic.toList(ic.size().getInfo())
i = 63
img_i = ee.Image(ic_list.get(i))
ee.Date(img_i.get('system:time_start')).get('year').getInfo()

2019

In [195]:
def extract_month(image):
    '''Extract the year from a given image'''
    year = ee.Date(image.get('system:time_start')).get('year')
    return image.set('year', year)

def aggregate_images(year):
    '''Aggregate the image collection by year'''
    monthImages = ic.filter(ee.Filter.calendarRange(year, year, 'year'))
    monthImage = monthImages.median()
    return monthImage.set('year', year)

In [196]:
groups = ic.map(extract_month).aggregate_array('year').distinct().map(aggregate_images)
groups.length().getInfo()

7

Indexing the 4th image from the aggregated image collection

In [201]:
groups.get(3).getInfo()

{'type': 'Image',
 'bands': [{'id': 'NDWI',
   'data_type': {'type': 'PixelType',
    'precision': 'float',
    'min': -1,
    'max': 1},
   'crs': 'EPSG:4326',
   'crs_transform': [1, 0, 0, 0, 1, 0]},
  {'id': 'NDVI',
   'data_type': {'type': 'PixelType',
    'precision': 'float',
    'min': -1,
    'max': 1},
   'crs': 'EPSG:4326',
   'crs_transform': [1, 0, 0, 0, 1, 0]}],
 'properties': {'year': 2021}}

Convert the aggregated images from `ee.List` to `ee.ImageCollection`

In [202]:
agg_image_collection = ee.ImageCollection(groups)
agg_image_collection.size().getInfo()

7

### Convert the image collection to Xarray Dataset

In [203]:
agg_image_collection.first().getInfo()

{'type': 'Image',
 'bands': [{'id': 'NDWI',
   'data_type': {'type': 'PixelType',
    'precision': 'float',
    'min': -1,
    'max': 1},
   'crs': 'EPSG:4326',
   'crs_transform': [1, 0, 0, 0, 1, 0]},
  {'id': 'NDVI',
   'data_type': {'type': 'PixelType',
    'precision': 'float',
    'min': -1,
    'max': 1},
   'crs': 'EPSG:4326',
   'crs_transform': [1, 0, 0, 0, 1, 0]}],
 'properties': {'year': 2018, 'system:index': '0'}}

In [179]:
ds = xarray.open_dataset(agg_image_collection, engine='ee', crs='EPSG:4326', scale=0.3)
size_mb = ds.nbytes/(1024**2)
print(f"Total size of the xarray dataset: {size_mb:.2f} MB")
ds



Total size of the xarray dataset: 38.47 MB


In [57]:
ds.dims, ds.coords

(Frozen({'time': 64, 'lon': 1200, 'lat': 600}),
 Coordinates:
   * time     (time) datetime64[ns] 2019-01-02T04:41:01 ... 2019-12-30T04:31:0...
   * lon      (lon) float64 -179.8 -179.6 -179.2 -178.9 ... 179.2 179.5 179.8
   * lat      (lat) float64 -89.85 -89.55 -89.25 -88.95 ... 89.25 89.55 89.85)

- Next we will find the composite for each year
- The total size of the dataset is huge, that will also reduce the total size

## Summarize/Aggregate the data

In [58]:
ds.groupby('time.year').median()

EEException: Total request size (62914560 bytes) must be less than or equal to 50331648 bytes.

Total size of the xarray dataset: 6.72
