In [None]:
#default_exp sentinel2

# Catapult Sentinel-2 L2A

> STAC *Item* and *Collection* metadata attribute hardcoding and parsing for Sentinel-2 datasets prepared by Catapult (data already prepped to COG). Copernicus-generated L2A datasets converted directly to COGs and where L1C available appropriate sen2cor versions applied. 

**This nb takes care of both hardcoding and parsing of *Collection* and *Item*-specific metadata attributes.**

**See cs_stac_catalog nb for compilation of all catalogues and writing to public-eo-data. We envisage this second nb comprising tools for updating / appending new items to a *Collection*.**

In [None]:
#hide
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
import os
from glob import glob
import time
import numpy as np
from datetime import datetime
import json

import xmltodict
import pystac
from pystac import STAC_IO
from pystac.extensions.eo import Band
from pystac.extensions.sar import FrequencyBand, Polarization, ObservationDirection
import geopandas as gpd
import rasterio
# from shapely.geometry import Polygon

from sac_stac.utils import s3_list_objects_paths
from sac_stac.utils import pystac_setIO, create_uri, get_img_crs_and_bbox
from sac_stac.extensions import add_custom_extensions
add_custom_extensions() # needed to extend pystac extensions without formally adding
pystac_setIO() # use bespoke own IO 

set our own I/O for pystac

## **Hardcoded Attributes**

In [None]:
#export
# common_names aligned to stac eo ext for inclusion at Asset-level - https://github.com/radiantearth/stac-spec/tree/master/extensions/eo
# scl added for scene classification
S2_BANDS = [Band.create(name='aot', description='TBD', common_name='aot'),
                  Band.create(name='coastal', description='TBD', common_name='coastal'),
                  Band.create(name='blue', description='TBD', common_name='blue'),
                  Band.create(name='green', description='TBD', common_name='green'),
                  Band.create(name='red', description='TBD', common_name='red'),
                  Band.create(name='nir', description='TBD', common_name='nir'),
                  Band.create(name='nir08', description='TBD', common_name='nir08'),
                  Band.create(name='nir09', description='TBD', common_name='nir09'),
                  Band.create(name='swir16', description='TBD', common_name='swir16'),
                  Band.create(name='swir22', description='TBDm', common_name='swir22'),
                  Band.create(name='scene_classification', description='TBDm', common_name='scene_classification'),                   
                  ]
# refs to help link file paths with bands when creating Assets
S2_BAND_REFS = {
    'AOT_10m': 'aot',
    'B01_60m': 'coastal',
    'B02_10m': 'blue',
    'B03_10m': 'green',
    'B04_10m': 'red',
    'B08_10m': 'nir',
    'B8A_20m': 'nir08',
    'B09_60m': 'nir09',
    'B11_20m': 'swir16',
    'B12_20m': 'swir22',
    'SCL_20m': 'scene_classification'
}
# measurements formatted for product_definition at Colelction-level and ODC indexing
S2_MEASUREMENTS = [{'name':'aot','units':'1','dtype':'uint16','nodata':0},
                   {'name':'coastal','units':'1','dtype':'uint16','nodata':0},
                   {'name':'blue','units':'1','dtype':'uint16','nodata':0},
                   {'name':'green','units':'1','dtype':'uint16','nodata':0},
                   {'name':'red','units':'1','dtype':'uint16','nodata':0},
                   {'name':'nir','units':'1','dtype':'uint16','nodata':0},
                   {'name':'nir08','units':'1','dtype':'uint16','nodata':0},
                   {'name':'nir09','units':'1','dtype':'uint16','nodata':0},
                   {'name':'swir16','units':'1','dtype':'uint16','nodata':0},
                   {'name':'swir22','units':'1','dtype':'uint16','nodata':0},
                   {'name':'scene_classification','units':'1','dtype':'uint8','nodata':0}]
# metadata for product_definition at Collection-level and ODC indexing
S2_METADATA = {
    "product":{"name":"sentinel_2"}
}

## **Representative Sample of *Collection* *Items***

get paths of representative *Items* and *Assets* associated with *Collection*

In [None]:
obj_paths_list = s3_list_objects_paths('public-eo-data', 'common_sensing/fiji/sentinel_2/')

In [None]:
# get unique Item / scene names (third dir from path)
item_names = list(np.unique([ i.split('/')[-2] for i in obj_paths_list ]))
item_names[:5]

['S2A_MSIL2A_20151002T222056_T01KAU',
 'S2A_MSIL2A_20151002T222056_T01KBU',
 'S2A_MSIL2A_20151022T222102_T01KAU',
 'S2A_MSIL2A_20151022T222102_T01KBU',
 'S2A_MSIL2A_20151114T223002_T60KWD']

## **Individual *Item* Example**

can just work with one *Item* to dev tools

In [None]:
item_name = item_names[0]
item_name

'S2A_MSIL2A_20151002T222056_T01KAU'

and *Assets* associated with that *Item*

In [None]:
item_obj_paths = [ i for i in obj_paths_list if item_name in i]
item_obj_paths[:2]

['common_sensing/fiji/sentinel_2/S2A_MSIL2A_20151002T222056_T01KAU/S2A_MSIL2A_20151002T222056_T01KAU_AOT_10m.tif',
 'common_sensing/fiji/sentinel_2/S2A_MSIL2A_20151002T222056_T01KAU/S2A_MSIL2A_20151002T222056_T01KAU_B01_60m.tif']

## ***Item* Creation & Metadata**

Functions to parse *Item*-level metadata from file naming conventions, existing metadata files or, where necessary, reading *Asset* metadata from image.

In [None]:
#export
def s2_get_dt(item_name):
    """Get time """
    return datetime.strptime(''.join([item_name.split('_')[2][:8],item_name.split('_')[2][9:]]), '%Y%m%d%H%M%S')

In [None]:
s2_get_dt(item_name)

datetime.datetime(2015, 10, 2, 22, 20, 56)

In [None]:
#export
def s2_get_spatial(img_uri):
    bbox, g, epsg = get_img_crs_and_bbox(img_uri) # g not valid mask, only bbox geometry...
    epsg_code = f"EPSG:{epsg.to_dict()['init'][5:]}"
    geom = json.loads(gpd.GeoSeries([g], crs=epsg_code).to_crs('EPSG:4326').to_json())['features'][0]['geometry']
    return bbox, geom, epsg_code    

In [None]:
s2_get_spatial('https://s3-uk-1.sa-catapult.co.uk/public-eo-data/common_sensing/fiji/sentinel_2/S2A_MSIL2A_20151022T222102_T01KBU/S2A_MSIL2A_20151022T222102_T01KBU_B01_60m.tif')

([199980.0, 7790200.0, 309780.0, 7900000.0],
 {'type': 'Polygon',
  'coordinates': [[[-179.8664990658884, -19.961639731204293],
    [-179.84911627778376, -18.970593044236296],
    [-178.80690412376134, -18.983701491992505],
    [-178.81793922844454, -19.975488243224966],
    [-179.8664990658884, -19.961639731204293]]]},
 'EPSG:32701')

In [None]:
#export
def s2_create_item(item_name, item_obj_paths):

    # assume first item img asset rep of spatial
    bbox, geom, epsg = s2_get_spatial(create_uri([i for i in item_obj_paths if i.endswith('.tif')][0])) 

    # core
    item = pystac.Item(id=item_name,
                      datetime=s2_get_dt(item_name),
                      geometry=geom,
                      bbox=bbox,
                      properties={})
    
    # extensions
    item.ext.enable('projection')
    item.ext.projection.epsg = int(epsg[5:])
        
    item.ext.enable('eo')
    item.ext.eo.cloud_cover = 0.0    # need to get from meta
    
    item.common_metadata.gsd = 10    # nominal asset resolution
    item.common_metadata.platform = 'sentinel_2'    # should diff S2 AvB?
    item.common_metadata.instruments = ['msi']
    item.common_metadata.constellation = 'sentinel_2'
        
    return item

In [None]:
example_item = s2_create_item(item_name, item_obj_paths)
example_item.validate()

In [None]:
example_item.to_dict()

{'type': 'Feature',
 'stac_version': '1.0.0-beta.2',
 'id': 'S2A_MSIL2A_20151002T222056_T01KAU',
 'properties': {'proj:epsg': 32701,
  'eo:cloud_cover': 0.0,
  'gsd': 10,
  'platform': 'sentinel-2',
  'instruments': ['msi'],
  'datetime': '2015-10-02T22:20:56Z'},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[179.17926444633878, -19.943650180308413],
    [179.20240407672088, -18.95356468137259],
    [-179.75632222592535, -18.971998100113254],
    [-179.77314066061123, -19.963124106519743],
    [179.17926444633878, -19.943650180308413]]]},
 'links': [],
 'assets': {},
 'bbox': [99960.0, 7790200.0, 209760.0, 7900000.0],
 'stac_extensions': ['projection', 'eo']}

### **Adding *Assets* and *Asset* metadata to *Item***

Need to find the actual asset path for a given band. (Note that we typically store COGs per-band.)

In [None]:
#export
def s2_find_band_path(band_ref, item_obj_paths):
    matched_paths = [ o for o in item_obj_paths if (band_ref in o) ]
    if len(matched_paths) > 1:
        raise Exception(f"Found too many matches: {matched_paths}")
#     elif len(matched_paths) == 0:    # should probs add smth for when no asset is found (i.e. to help odc index)...
#         raise Warning(f"")
    return matched_paths[0]

In [None]:
for b_ref in S2_BAND_REFS: 
    print(b_ref, S2_BAND_REFS[b_ref])
    print(s2_find_band_path(b_ref, item_obj_paths))

AOT_10m aot
common_sensing/fiji/sentinel_2/S2A_MSIL2A_20151002T222056_T01KAU/S2A_MSIL2A_20151002T222056_T01KAU_AOT_10m.tif
B01_60m coastal
common_sensing/fiji/sentinel_2/S2A_MSIL2A_20151002T222056_T01KAU/S2A_MSIL2A_20151002T222056_T01KAU_B01_60m.tif
B02_10m blue
common_sensing/fiji/sentinel_2/S2A_MSIL2A_20151002T222056_T01KAU/S2A_MSIL2A_20151002T222056_T01KAU_B02_10m.tif
B03_10m green
common_sensing/fiji/sentinel_2/S2A_MSIL2A_20151002T222056_T01KAU/S2A_MSIL2A_20151002T222056_T01KAU_B03_10m.tif
B04_10m red
common_sensing/fiji/sentinel_2/S2A_MSIL2A_20151002T222056_T01KAU/S2A_MSIL2A_20151002T222056_T01KAU_B04_10m.tif
B08_10m nir
common_sensing/fiji/sentinel_2/S2A_MSIL2A_20151002T222056_T01KAU/S2A_MSIL2A_20151002T222056_T01KAU_B08_10m.tif
B8A_20m nir08
common_sensing/fiji/sentinel_2/S2A_MSIL2A_20151002T222056_T01KAU/S2A_MSIL2A_20151002T222056_T01KAU_B8A_20m.tif
B09_60m nir09
common_sensing/fiji/sentinel_2/S2A_MSIL2A_20151002T222056_T01KAU/S2A_MSIL2A_20151002T222056_T01KAU_B09_60m.tif
B11_2

Then we can add each asset expected to be found and test with the example item taken from above.

In [None]:
#export
def get_asset_spatial(img_uri):
    with rasterio.open(img_uri) as ds:
        return list(ds.shape), list(ds.transform)

In [None]:
#export
def s2_add_assets2item(item, item_obj_paths):
    for b_ref in S2_BAND_REFS:
        band = [ b for b in S2_BANDS if b.name == S2_BAND_REFS[b_ref] ][0]

        band_path = s2_find_band_path(b_ref, item_obj_paths)
        band_url = create_uri(band_path)

        asset = pystac.Asset(href=band_url, 
                             media_type=pystac.MediaType.COG)
        
        proj_shp, proj_tran = get_asset_spatial(band_url)
        
        item.ext.projection.set_transform(proj_tran, asset)
        item.ext.projection.set_shape(proj_shp, asset)        
        item.ext.eo.set_bands([band], asset)
        
        item.add_asset(band.name, asset)
        
    return item

In [None]:
example_item_with_assets = s2_add_assets2item(example_item, item_obj_paths)

Now we can see the completed *Item* stac record.

In [None]:
example_item_with_assets.validate()

In [None]:
example_item_with_assets.to_dict()

{'type': 'Feature',
 'stac_version': '1.0.0-beta.2',
 'id': 'S2A_MSIL2A_20151002T222056_T01KAU',
 'properties': {'proj:epsg': 32701,
  'eo:cloud_cover': 0.0,
  'gsd': 10,
  'platform': 'sentinel-2',
  'instruments': ['msi'],
  'datetime': '2015-10-02T22:20:56Z'},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[179.17926444633878, -19.943650180308413],
    [179.20240407672088, -18.95356468137259],
    [-179.75632222592535, -18.971998100113254],
    [-179.77314066061123, -19.963124106519743],
    [179.17926444633878, -19.943650180308413]]]},
 'links': [],
 'assets': {'aot': {'href': 'http://s3-uk-1.sa-catapult.co.uk/public-eo-data/common_sensing/fiji/sentinel_2/S2A_MSIL2A_20151002T222056_T01KAU/S2A_MSIL2A_20151002T222056_T01KAU_AOT_10m.tif',
   'type': 'image/tiff; application=geotiff; profile=cloud-optimized',
   'proj:transform': [5490, 5490],
   'proj:shape': [20.0, 0.0, 99960.0, 0.0, -20.0, 7900000.0, 0.0, 0.0, 1.0],
   'eo:bands': [{'name': 'aot', 'common_name': 'aot', 'descrip

### **Compiling *Items* into a *Collection*** 

We naurally want to apply the above tools to all related *Items* and their *Assets* in order to build a *Collection* that can sit within another *Collection* or an overall *Catalog* covering various geospatial datasets.

In [None]:
#export
def s2_create_collection(s2_dir, bucket='public-eo-data', n=None):
    
    collection_id = 'sentinel-2'
    collection_title = 'Copernicus Sentinel-2 datasets'
    collection_description = '''### Sentinel-2 Datasets

    A collection of Sentinel-2 datasets. Obtained from Copernicus Hub and Google Cloud and preparred by Catapult.
    '''
    
    # initially arbitrary as updated later
    spatial_extent = pystac.SpatialExtent([[-7.57216793459, 49.959999905, 1.68153079591, 58.6350001085]])
    temporal_extent = pystac.TemporalExtent([[datetime(2011, 12, 16), None]])
    collection_extent = pystac.Extent(spatial_extent, temporal_extent)
    
    collection = pystac.Collection(id=collection_id,
                                   title=collection_title,
                                   description=collection_description,
                                   extent=collection_extent,
#                                    product_definition={})
                                   properties={})
    
    collection.providers = [
        pystac.Provider(name='European Space Agency', roles=['producer'], url='https://www.esa.int/'),
        pystac.Provider(name='European Space Agency', roles=['licensor'], url='https://www.esa.int/'),
        pystac.Provider(name='Satellite Applications Catapult', roles=['processor'], url='https://sa.catapult.org.uk/'),
        pystac.Provider(name='Satellite Applications Catapult', roles=['host'], url='https://sa.catapult.org.uk/')
    ]
    
    collection.ext.enable('product_definition')
    collection.ext.product_definition.metadata_type = "eo3"
    collection.ext.product_definition.metadata = S2_METADATA
    collection.ext.product_definition.measurements = S2_MEASUREMENTS
        
    obj_paths_list = s3_list_objects_paths(bucket, s2_dir)
    item_names = list(np.unique([ i.split('/')[-2] for i in obj_paths_list ]))
    
    for item_name in item_names[:n]:
#         print(item_name)
        item_obj_paths = [ i for i in obj_paths_list if item_name in i]
        
        item = s2_create_item(item_name, item_obj_paths)
        item = s2_add_assets2item(item, item_obj_paths)
        
        collection.add_item(item)
    
    collection.update_extent_from_items()
        
    return collection
    

In [None]:
example_collection = s2_create_collection('common_sensing/fiji/sentinel_2/', n=10)

S2A_MSIL2A_20151002T222056_T01KAU
S2A_MSIL2A_20151002T222056_T01KBU
S2A_MSIL2A_20151022T222102_T01KAU
S2A_MSIL2A_20151022T222102_T01KBU
S2A_MSIL2A_20151114T223002_T60KWD
S2A_MSIL2A_20151114T223002_T60KWE
S2A_MSIL2A_20151114T223002_T60KWF
S2A_MSIL2A_20151114T223002_T60KWG
S2A_MSIL2A_20151114T223002_T60KXD
S2A_MSIL2A_20151114T223002_T60KXE


In [None]:
example_collection.describe()

* <Collection id=sentinel-2>
  * <Item id=S2A_MSIL2A_20151002T222056_T01KAU>
  * <Item id=S2A_MSIL2A_20151002T222056_T01KBU>
  * <Item id=S2A_MSIL2A_20151022T222102_T01KAU>
  * <Item id=S2A_MSIL2A_20151022T222102_T01KBU>
  * <Item id=S2A_MSIL2A_20151114T223002_T60KWD>
  * <Item id=S2A_MSIL2A_20151114T223002_T60KWE>
  * <Item id=S2A_MSIL2A_20151114T223002_T60KWF>
  * <Item id=S2A_MSIL2A_20151114T223002_T60KWG>
  * <Item id=S2A_MSIL2A_20151114T223002_T60KXD>
  * <Item id=S2A_MSIL2A_20151114T223002_T60KXE>


In [None]:
example_collection.to_dict()

{'id': 'sentinel-2',
 'stac_version': '1.0.0-beta.2',
 'description': '### Sentinel-2 Datasets\n\n    A collection of Sentinel-2 datasets. Obtained from Copernicus Hub and Google Cloud and preparred by Catapult.\n    ',
 'links': [{'rel': 'root', 'href': None, 'type': 'application/json'},
  {'rel': 'item', 'href': None, 'type': 'application/json'},
  {'rel': 'item', 'href': None, 'type': 'application/json'},
  {'rel': 'item', 'href': None, 'type': 'application/json'},
  {'rel': 'item', 'href': None, 'type': 'application/json'},
  {'rel': 'item', 'href': None, 'type': 'application/json'},
  {'rel': 'item', 'href': None, 'type': 'application/json'},
  {'rel': 'item', 'href': None, 'type': 'application/json'},
  {'rel': 'item', 'href': None, 'type': 'application/json'},
  {'rel': 'item', 'href': None, 'type': 'application/json'},
  {'rel': 'item', 'href': None, 'type': 'application/json'}],
 'stac_extensions': ['product_definition'],
 'title': 'Copernicus Sentinel-2 datasets',
 'extent': 

In [None]:
#hide
from nbdev.export import notebook2script; notebook2script()

Converted 00_extensions.ipynb.
Converted 00_rediswq.ipynb.
Converted 00_utils.ipynb.
Converted 01A_pleiades.ipynb.
Converted 01B_pleiades_prep_worker.ipynb.
Converted 02A_spot.ipynb.
Converted 03A_novasar.ipynb.
Converted 04A_cs_sentinel2.ipynb.
Converted 09_cs_stact_catalog.ipynb.
Converted 09_sac_stac_catalog.ipynb.
Converted 10_load_stac.ipynb.
Converted index.ipynb.
