In [1]:
#default_exp pleiades

# NovaSAR

> Working through stac item metadata parsing, etc. for NovaSAR datasets (data already prepped to COG).

In [2]:
#hide
# %load_ext autoreload
# %autoreload 2

In [3]:
# from nbdev.showdoc import *

In [4]:
import os
import boto3

import numpy as np
from datetime import datetime
import json

import xmltodict
import pystac
from pystac import STAC_IO
from pystac.extensions.eo import Band
from pystac.extensions.sar import FrequencyBand, Polarization, ObservationDirection
import rasterio
from shapely.geometry import Polygon

import ipynb

from ipynb.fs.defs.utils import *

## **STAC metadata**: ***core & extensions***

With the cogs above hosted object storage we can go create some tools for building a STAC *Collection* from them, to be used within in **insert nb** to build a *Catalog* of different Catapult *Collections*. As per the nb these initially sit within a *static* STAC alongside the actual datasets on the object storage. However we plan on hosting via a STAC compliant API - probs [pygeoapi](https://pygeoapi.io/). First define bucket and imagery directory

In [53]:
s3_bucket = 'public-eo-data'
img_dir = 'novasar_uk_test/'

obj_paths_list = s3_list_objects_paths(s3_bucket, img_dir)

In [54]:
# get unique Item / scene names (third dir from path)

scene_names = list(np.unique([i.split('/')[2] for i in obj_paths_list if '20m-ScanSAR' in i or '6m-Stripmap' in i]))

scene_names[:5]

['NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog',
 'NovaSAR_01_10135_slc_11_200215_094601_HH_2_ML_TC_TF_cog',
 'NovaSAR_01_10387_slc_11_200307_101114_HH_1_ML_TC_TF_cog',
 'NovaSAR_01_10387_slc_11_200307_101118_HH_2_ML_TC_TF_cog',
 'NovaSAR_01_10387_slc_11_200307_101122_HH_3_ML_TC_TF_cog']

### **Individual example for iteration**

Can just work with one

In [7]:
scene_name = scene_names[0]
scene_name

'NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog'

and objects associated with that scene

In [8]:
scene_obj_paths = [i for i in obj_paths_list if scene_name in i]
list(scene_obj_paths)

['novasar_uk_test/6m-Stripmap/NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog/NovaSAR_01_10135_slc_11_200215_094557_HH_1_Gamma0_Intensity_HH_db.tif',
 'novasar_uk_test/6m-Stripmap/NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog/original_metadata.xml',
 'novasar_uk_test/6m-Stripmap/NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog/process_metadata.dim']

set our own I/O for pystac

In [9]:
#export
#pystac_setIO()

### **Functions for** ***Item*** **metadata**

In [10]:
#export
def novasar_get_dt(scene_name):
    return datetime.strptime('_'.join(scene_name.split('_')[5:7]), '%y%m%d_%H%M%S')

In [11]:
novasar_get_dt(scene_name)

datetime.datetime(2020, 2, 15, 9, 45, 57)

In [55]:
#export
def novasar_parsemeta(scene_name, scene_obj_paths):
    meta_path = [i for i in scene_obj_paths if (i.endswith('.dim'))][0]
    return xmltodict.parse(pystac.STAC_IO.read_text(create_uri(meta_path)))

# TODO: link to SeDAS API

In [56]:
# meta = novasar_parsemeta(scene_name, scene_obj_paths)

# orbit_data = meta['Dimap_Document']['Dataset_Sources']['MDElem']['MDElem'][1]['MDElem']['MDElem'][2]['MDATTR'][0]['#text'] #this seems daft

In [14]:
def novasar_get_crs_and_bbox(raster_uri):
    """
    BBOX list, geometry shapely and rasterio crs from
    URI of COG.
    nb: footprint currently same as bbo.
    """
    with rasterio.open(raster_uri) as ds:
        bounds = ds.bounds
        bbox = [bounds.left, bounds.bottom, bounds.right, bounds.top]
        footprint = [[
            [bounds.left, bounds.bottom],
            [bounds.left, bounds.top],
            [bounds.right, bounds.top],
            [bounds.right, bounds.bottom],
            [bounds.left, bounds.bottom]
        ]] 
        return bbox, {'type': 'Polygon', 'coordinates': footprint}, ds.crs #changed format of geometry

In [15]:
def novasar_get_instrument_mode(scene_obj_paths): 
    return scene_obj_paths[0].split('/')[1] # any object path

In [16]:
novasar_get_instrument_mode(scene_obj_paths)

'6m-Stripmap'

In [17]:
bbox, g, epsg = novasar_get_crs_and_bbox(create_uri([i for i in scene_obj_paths if i.endswith('.tif')][0]))
bbox, g, epsg.to_dict()['init'][5:]

([-1.0478259576205007,
  50.397680132459946,
  -0.6671061061552759,
  50.6936401639443],
 {'type': 'Polygon',
  'coordinates': [[[-1.0478259576205007, 50.397680132459946],
    [-1.0478259576205007, 50.6936401639443],
    [-0.6671061061552759, 50.6936401639443],
    [-0.6671061061552759, 50.397680132459946],
    [-1.0478259576205007, 50.397680132459946]]]},
 '4326')

In [18]:
#export
def novasar_create_item(scene_name, scene_obj_paths):

#     meta = pleiades_parsemeta(scene_name, scene_obj_paths)

#     crs = pleiades_get_crs(meta)

    bbox, g, epsg = novasar_get_crs_and_bbox(create_uri([i for i in scene_obj_paths if i.endswith('.tif')][0]))

    item = pystac.Item(id=scene_name,
                      datetime=novasar_get_dt(scene_name),
                      geometry=g,
                      bbox=bbox,
                      properties={})

    # need to add func for res of novasar
#     item.common_metadata.gsd = pleiades_get_gsd(meta)

    item.ext.enable('projection')
    item.ext.projection.epsg = int(epsg.to_dict()['init'][5:])
    
    item.ext.enable('sar')
    item.ext.sar.instrument_mode = novasar_get_instrument_mode(scene_obj_paths)
    item.ext.sar.frequency_band = FrequencyBand('S')
    
    # item.ext.sar.orbit = orbit_data (no such extension)

    return item

In [19]:
item = novasar_create_item(scene_name, scene_obj_paths)

In [20]:
item.to_dict()

{'type': 'Feature',
 'stac_version': '1.0.0-beta.2',
 'id': 'NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog',
 'properties': {'proj:epsg': 4326,
  'sar:instrument_mode': '6m-Stripmap',
  'sar:frequency_band': 'S',
  'datetime': '2020-02-15T09:45:57Z'},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-1.0478259576205007, 50.397680132459946],
    [-1.0478259576205007, 50.6936401639443],
    [-0.6671061061552759, 50.6936401639443],
    [-0.6671061061552759, 50.397680132459946],
    [-1.0478259576205007, 50.397680132459946]]]},
 'links': [],
 'assets': {},
 'bbox': [-1.0478259576205007,
  50.397680132459946,
  -0.6671061061552759,
  50.6936401639443],
 'stac_extensions': ['projection', 'sar']}

In [21]:
# item.validate() # won't work as need polarisation (added below)

### **Functions for** ***Asset*** **metadata**

Once we have *Item* level metadata we can add the actual *Assets*. There are a few constants used within these functions at the *Item* level (i.e. band info) and criteria for finding within the object paths.

In [22]:
# first, get href(s) for asset(s) in scene

# cog_hrefs = list(map(create_uri, [i for i in obj_paths_list if scene_name in i if i.endswith('.tif')]))

cog_href = create_uri([i for i in scene_obj_paths if i.endswith('.tif')][0])

cog_href

'http://s3-uk-1.sa-catapult.co.uk/public-eo-data/novasar_uk_test/6m-Stripmap/NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog/NovaSAR_01_10135_slc_11_200215_094557_HH_1_Gamma0_Intensity_HH_db.tif'

In [23]:
def novasar_get_pol(asset_path):
    asset_name = os.path.basename(asset_path)
    if '_VV_' in asset_name:
        pol = Polarization('VV')
    elif '_HH_' in asset_name:
        pol = Polarization('HH')
    return pol

In [24]:
novasar_get_pol(cog_href)

<Polarization.HH: 'HH'>

In [25]:
def novasar_get_prod_type(asset_path): #TODO: generalise for multiple assets ??
    asset_name = os.path.basename(asset_path)
    if 'Gamma0' in asset_path:
        prod = 'gamma0_db'
    return prod

In [26]:
novasar_get_prod_type(cog_href)

'gamma0_db'

In [27]:
# item.ext.sar.observation_direction

In [28]:
def add_asset_novasar(item, asset_path):
    
    # for asset_href in asset_hrefs:
    item.add_asset('cog', pystac.Asset(href = asset_path, media_type = pystac.MediaType.COG))

    item.ext.sar.polarizations = [Polarization(novasar_get_pol(asset_path))]
    item.ext.sar.product_type = novasar_get_prod_type(os.path.basename(asset_path))
    
    return item

In [29]:
add_asset_novasar(item, cog_href)

<Item id=NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog>

In [30]:
item.validate()

In [31]:
item.to_dict()

{'type': 'Feature',
 'stac_version': '1.0.0-beta.2',
 'id': 'NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog',
 'properties': {'proj:epsg': 4326,
  'sar:instrument_mode': '6m-Stripmap',
  'sar:frequency_band': 'S',
  'datetime': '2020-02-15T09:45:57Z',
  'sar:polarizations': ['HH'],
  'sar:product_type': 'gamma0_db'},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-1.0478259576205007, 50.397680132459946],
    [-1.0478259576205007, 50.6936401639443],
    [-0.6671061061552759, 50.6936401639443],
    [-0.6671061061552759, 50.397680132459946],
    [-1.0478259576205007, 50.397680132459946]]]},
 'links': [],
 'assets': {'cog': {'href': 'http://s3-uk-1.sa-catapult.co.uk/public-eo-data/novasar_uk_test/6m-Stripmap/NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog/NovaSAR_01_10135_slc_11_200215_094557_HH_1_Gamma0_Intensity_HH_db.tif',
   'type': <MediaType.COG: 'image/tiff; application=geotiff; profile=cloud-optimized'>}},
 'bbox': [-1.0478259576205007,
  50.397680132459946,


### Make Collection

Now gather 6m and 20m resolution assets together into collections

In [32]:
[i for i in obj_paths_list if '_Gamma0_' in i][0]

'novasar_uk_test/20m-ScanSAR/NovaSAR_01_11849_scd_20_200524_094209_HH_1_ML_TC_TF_cog/NovaSAR_01_11849_scd_20_200524_094209_HH_1_Gamma0_intensity_HH_db.tif'

In [33]:
def create_novasar_collection(novasar_dir, bucket=s3_bucket): #, n=None):
    
    collection_id = f'novasar_{novasar_dir[16:]}'
    collection_title = 'NovaSAR test'
    collection_description = '''### NovaSAR test STACs

    A collection of NovaSAR COGs stored in Catapult object storage
    '''
    
    # initially arbitrary as updated later
    spatial_extent = pystac.SpatialExtent([[-180, 90, 180, 90]])
    temporal_extent = pystac.TemporalExtent([[datetime(2011, 12, 16), None]])
    collection_extent = pystac.Extent(spatial_extent, temporal_extent)
    
    collection = pystac.Collection(id=collection_id,
                                   title=collection_title,
                                   description=collection_description,
                                   extent=collection_extent,
                                   properties={})
    
    collection.providers = [
       # pystac.Provider(name='European Space Agency', roles=['producer'], url='https://www.esa.int/'),
       # pystac.Provider(name='European Space Agency', roles=['licensor'], url='https://www.esa.int/'),
        pystac.Provider(name='Satellite Applications Catapult', roles=['processor'], url='https://sa.catapult.org.uk/'),
        pystac.Provider(name='Satellite Applications Catapult', roles=['host'], url='https://sa.catapult.org.uk/')
    ]
    
    obj_paths_list = s3_list_objects_paths(bucket, novasar_dir)
    scene_names = list(np.unique([i.split('/')[2] for i in obj_paths_list]))
    
    for scene_name in scene_names: #[:n]:
        
        print(scene_name)
        
        scene_obj_paths = [i for i in obj_paths_list if scene_name in i]
        
        item = novasar_create_item(scene_name, scene_obj_paths)
        
        add_asset_novasar(item, create_uri([i for i in scene_obj_paths if '_Gamma0_' in i][0])) # only works for gamma 0 products (so far)
        
        collection.add_item(item)
    
    collection.update_extent_from_items()
        
    return collection

In [34]:
collection_6m = create_novasar_collection('novasar_uk_test/6m-Stripmap')
collection_20m = create_novasar_collection('novasar_uk_test/20m-ScanSAR')

NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog
NovaSAR_01_10135_slc_11_200215_094601_HH_2_ML_TC_TF_cog
NovaSAR_01_10387_slc_11_200307_101114_HH_1_ML_TC_TF_cog
NovaSAR_01_10387_slc_11_200307_101118_HH_2_ML_TC_TF_cog
NovaSAR_01_10387_slc_11_200307_101122_HH_3_ML_TC_TF_cog
NovaSAR_01_10387_slc_11_200307_101127_HH_4_ML_TC_TF_cog
NovaSAR_01_11109_slc_11_200410_232727_HH_1_ML_TC_TF_cog
NovaSAR_01_11109_slc_11_200410_232732_HH_2_ML_TC_TF_cog
NovaSAR_01_11109_slc_11_200410_232737_HH_3_ML_TC_TF_cog
NovaSAR_01_11183_slc_11_200417_000118_HH_1_ML_TC_TF_cog
NovaSAR_01_11183_slc_11_200417_000122_HH_2_ML_TC_TF_cog
NovaSAR_01_11454_slc_11_200506_224624_HH_1_ML_TC_TF_cog
NovaSAR_01_11454_slc_11_200506_224629_HH_2_ML_TC_TF_cog
NovaSAR_01_11454_slc_11_200506_224633_HH_3_ML_TC_TF_cog
NovaSAR_01_12184_slc_11_200615_101308_HH_ML_TC_TF_cog
NovaSAR_01_12211_slc_11_200616_101852_HH_1_ML_TC_TF_cog
NovaSAR_01_12211_slc_11_200616_101857_HH_2_ML_TC_TF_cog
NovaSAR_01_12478_slc_11_200626_225229_HH_1_ML_TC_T

In [35]:
next(collection_6m.get_items()).to_dict()

{'type': 'Feature',
 'stac_version': '1.0.0-beta.2',
 'id': 'NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog',
 'properties': {'proj:epsg': 4326,
  'sar:instrument_mode': '6m-Stripmap',
  'sar:frequency_band': 'S',
  'sar:polarizations': ['HH'],
  'sar:product_type': 'gamma0_db',
  'datetime': '2020-02-15T09:45:57Z'},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-1.0478259576205007, 50.397680132459946],
    [-1.0478259576205007, 50.6936401639443],
    [-0.6671061061552759, 50.6936401639443],
    [-0.6671061061552759, 50.397680132459946],
    [-1.0478259576205007, 50.397680132459946]]]},
 'links': [{'rel': 'root', 'href': None, 'type': 'application/json'},
  {'rel': 'collection', 'href': None, 'type': 'application/json'},
  {'rel': 'parent', 'href': None, 'type': 'application/json'}],
 'assets': {'cog': {'href': 'http://s3-uk-1.sa-catapult.co.uk/public-eo-data/novasar_uk_test/6m-Stripmap/NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog/NovaSAR_01_10135_slc_11_2002

In [36]:
# collection_20m.validate()

In [37]:
def upload_to_s3(body: bytes, key: str):
    
    s3_client, bucket = s3_create_client(s3_bucket='public-eo-data')

    s3_client.put_object(Bucket='public-eo-data',
                         Key=key,
                         Body=body)

# res = s3_list_objects(s3_bucket='public-eo-data', prefix='/stac_catalogs/sac_stac/')

# upload_to_s3(body=json.dumps(collection.to_dict(), indent = 2), 
            # key='stac_catalogs/novasar_test/collection.json')

# collection.to_dict()

## Overall Catalog

Create catalog for 6m and 20m collections

In [38]:
#export
def novasar_create_catalog(collections):

    catalog_id = 'novasar-overall'
    catalog_title = 'Catapult-hosted Novasar'
    catalog_description = '''### NovaSAR test STACs

    A collection of NovaSAR COGs stored in Catapult object storage
    '''
    catalog_extensions = ['eo', 'projection']

    catalog = pystac.Catalog(id=catalog_id,
                             title=catalog_title,
                             description=catalog_description,
                             stac_extensions=catalog_extensions)
    
    catalog.add_children(collections)

    return catalog

In [39]:
catalog=novasar_create_catalog([collection_6m, collection_20m])

In [40]:
next(next(catalog.get_children()).get_items()).to_dict()

{'type': 'Feature',
 'stac_version': '1.0.0-beta.2',
 'id': 'NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog',
 'properties': {'proj:epsg': 4326,
  'sar:instrument_mode': '6m-Stripmap',
  'sar:frequency_band': 'S',
  'sar:polarizations': ['HH'],
  'sar:product_type': 'gamma0_db',
  'datetime': '2020-02-15T09:45:57Z'},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-1.0478259576205007, 50.397680132459946],
    [-1.0478259576205007, 50.6936401639443],
    [-0.6671061061552759, 50.6936401639443],
    [-0.6671061061552759, 50.397680132459946],
    [-1.0478259576205007, 50.397680132459946]]]},
 'links': [{'rel': 'root', 'href': None, 'type': 'application/json'},
  {'rel': 'collection', 'href': None, 'type': 'application/json'},
  {'rel': 'parent', 'href': None, 'type': 'application/json'}],
 'assets': {'cog': {'href': 'http://s3-uk-1.sa-catapult.co.uk/public-eo-data/novasar_uk_test/6m-Stripmap/NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog/NovaSAR_01_10135_slc_11_2002

In [48]:
catalog.normalize_hrefs(create_uri('stac_catalogs/novasar_test'))

catalog.to_dict()

{'id': 'novasar-overall',
 'stac_version': '1.0.0-beta.2',
 'description': '### NovaSAR test STACs\n\n    A collection of NovaSAR COGs stored in Catapult object storage\n    ',
 'links': [{'rel': 'root',
   'href': 'http://s3-uk-1.sa-catapult.co.uk/public-eo-data/stac_catalogs/novasar_test/catalog.json',
   'type': 'application/json'},
  {'rel': 'child',
   'href': 'http://s3-uk-1.sa-catapult.co.uk/public-eo-data/stac_catalogs/novasar_test/novasar_6m-Stripmap/collection.json',
   'type': 'application/json'},
  {'rel': 'child',
   'href': 'http://s3-uk-1.sa-catapult.co.uk/public-eo-data/stac_catalogs/novasar_test/novasar_20m-ScanSAR/collection.json',
   'type': 'application/json'},
  {'rel': 'self',
   'href': 'http://s3-uk-1.sa-catapult.co.uk/public-eo-data/stac_catalogs/novasar_test/catalog.json',
   'type': 'application/json'}],
 'stac_extensions': ['eo', 'projection'],
 'title': 'Catapult-hosted Novasar'}

In [49]:
catalog.get_self_href().split('/', 4)[4]

'stac_catalogs/novasar_test/catalog.json'

In [50]:
def save_cat_to_s3(catalog):
    
    upload_to_s3(body=json.dumps(catalog.to_dict(), indent = 2), 
                 key=catalog.get_self_href().split('/', 4)[4])
    
    for collection in catalog.get_children():
    
        upload_to_s3(body=json.dumps(catalog.to_dict(), indent = 2), 
                 key=collection.get_self_href().split('/', 4)[4]) 
    
        for item in collection.get_items():

            upload_to_s3(body=json.dumps(item.to_dict(), indent = 2), 
                         key=item.get_self_href().split('/', 4)[4]) 

In [51]:
save_cat_to_s3(catalog)

In [52]:
catalog.validate_all()

### SeDAS 

In [46]:
from getpass import getpass
from urllib.error import HTTPError

from sedas_pyapi.sedas_api import SeDASAPI


if __name__ == '__main__':

    # creating the SeDASAPI object attempts to log into live. It will throw an exception if it cant. so we feed it real creds (any one using test should have a live set)
    _username = input("Please enter your test username:")
    __password = os.getenv('SEDAS_PWD')

    # Note the SeDASBulkDownload is very chatty at debug. But if you need to know what is going on enable logging.
    # import logging
    
    # logging.basicConfig(level=logging.DEBUG)
    # logger.setLevel(logging.DEBUG)

    # create the object this will connect to the test.
    sedas = SeDASAPI(_username, __password)

    # set the base url to point at the test instance
    sedas.base_url = "https://geobrowsertest.satapps.org/api/"

    # Now we need to reset a few variables that have the original base url still
    sedas.sensor_url = f"{sedas.base_url}sensors"
    sedas.authentication_url = f"{sedas.base_url}authentication"
    sedas.search_url = f"{sedas.base_url}search"

    # Get rid of the token force the log in to happen again.
    sedas._token = None  

    # now we can get the users actual test password
    sedas._username = input("Please enter your test username:")
    sedas.__password = getpass("Please enter your test password:")
    # and log into test
    sedas.login()

    # now what ever end points we call should go to the test server.
    result_sats = sedas.list_satellites()

    ## do some thing a bit more interesting with the results.
    print(json.dumps(result_sats, sort_keys=True, indent=4, separators=(',', ': ')))

    satellites = []
    for i in range(0, len(result_sats)):
        satellites.append(result_sats[i]['name'])

    print(f"Available satellites are: {', '.join(satellites)}")


Please enter your test username: 


ValueError: username and password must not be blank

In [None]:
os.getenv('SEDAS_PWD')

In [None]:
from sedas_pyapi.sedas_api import SeDASAPI

SeDASAPI(os.getenv('SEDAS_USERNAME'), os.getenv('SEDAS_PWD'))