In [None]:
#default_exp pleiades

# NovaSAR

> Working through stac item metadata parsing, etc. for NovaSAR datasets (data already prepped to COG).

In [None]:
#hide
%load_ext autoreload
%autoreload 2

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
import os
from glob import glob
import time
import numpy as np
from datetime import datetime
import json

import xmltodict
import pystac
from pystac import STAC_IO
from pystac.extensions.eo import Band
from pystac.extensions.sar import FrequencyBand, Polarization, ObservationDirection
import geopandas as gpd
import rasterio
from shapely.geometry import Polygon

from sac_stac.utils import s3_list_objects_paths
from sac_stac.utils import pystac_setIO, create_uri

## **STAC metadata**: ***core & extensions***

With the cogs above hosted object storage we can go create some tools for building a STAC *Collection* from them, to be used within in **insert nb** to build a *Catalog* of different Catapult *Collections*. As per the nb these initially sit within a *static* STAC alongside the actual datasets on the object storage. However we plan on hosting via a STAC compliant API - probs [pygeoapi](https://pygeoapi.io/).

In [None]:
obj_paths_list = s3_list_objects_paths('public-eo-data', 'novasar_uk_test/')

In [None]:
# get unique Item / scene names (third dir from path)
scene_names = list(np.unique([ i.split('/')[2] for i in obj_paths_list ]))
scene_names[:5]

['NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog',
 'NovaSAR_01_10135_slc_11_200215_094601_HH_2_ML_TC_TF_cog',
 'NovaSAR_01_10387_slc_11_200307_101114_HH_1_ML_TC_TF_cog',
 'NovaSAR_01_10387_slc_11_200307_101118_HH_2_ML_TC_TF_cog',
 'NovaSAR_01_10387_slc_11_200307_101122_HH_3_ML_TC_TF_cog']

### **Individual example for iteration**

can just work with one

In [None]:
scene_name = scene_names[0]
scene_name

'NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog'

and objects associated with that scene

In [None]:
scene_obj_paths = [ i for i in obj_paths_list if scene_name in i]
scene_obj_paths[:2]

['novasar_uk_test/6m-Stripmap/NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog/NovaSAR_01_10135_slc_11_200215_094557_HH_1_Gamma0_Intensity_HH_db.tif',
 'novasar_uk_test/6m-Stripmap/NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog/original_metadata.xml']

set our own I/O for pystac

In [None]:
#export
pystac_setIO()

### **Functions for** ***Item*** **metadata**

In [None]:
#export
def novasar_get_dt(scene_name):
    return datetime.strptime('_'.join(scene_name.split('_')[5:7]), '%y%m%d_%H%M%S')

In [None]:
novasar_get_dt(scene_name)

datetime.datetime(2020, 2, 15, 9, 45, 57)

In [None]:
#export
def novasar_parsemeta(scene_name, scene_obj_paths):
    meta_path = [i for i in scene_obj_paths if (i.endswith('.dim'))][0]
    return xmltodict.parse(pystac.STAC_IO.read_text(create_uri(meta_path)))

In [None]:
meta = novasar_parsemeta(scene_name, scene_obj_paths)
meta['Dimap_Document'].keys()

odict_keys(['@name', 'Metadata_Id', 'Dataset_Id', 'Dataset_Use', 'Production', 'Coordinate_Reference_System', 'Geoposition', 'Raster_Dimensions', 'Data_Access', 'Image_Interpretation', 'Dataset_Sources'])

In [None]:
def novasar_get_crs_and_bbox(raster_uri):
    """
    BBOX list, geometry shapely and rasterio crs from
    URI of COG.
    nb: footprint currently same as bbo.
    """
    with rasterio.open(raster_uri) as ds:
        bounds = ds.bounds
        bbox = [bounds.left, bounds.bottom, bounds.right, bounds.top]
        footprint = Polygon([
            [bounds.left, bounds.bottom],
            [bounds.left, bounds.top],
            [bounds.right, bounds.top],
            [bounds.right, bounds.bottom]
        ])
        return bbox, footprint, ds.crs

In [None]:
bbox, g, epsg = novasar_get_crs_and_bbox('https://s3-uk-1.sa-catapult.co.uk/public-eo-data/novasar_uk_test/6m-Stripmap/NovaSAR_01_9629_slc_11_200129_094350_HH_2_ML_TC_TF_cog/NovaSAR_01_9629_slc_11_200129_094350_HH_2_Gamma0_Intensity_HH_db.tif')
bbox, g.to_wkt(), epsg.to_dict()['init'][5:]

([-1.2347811947155378,
  50.54787880721796,
  -0.8301439683052121,
  50.88365450771424],
 'POLYGON ((-1.2347811947155378 50.5478788072179626, -1.2347811947155378 50.8836545077142404, -0.8301439683052121 50.8836545077142404, -0.8301439683052121 50.5478788072179626, -1.2347811947155378 50.5478788072179626))',
 '4326')

In [None]:
#export
def novasar_create_item(scene_name, scene_obj_paths):

#     meta = pleiades_parsemeta(scene_name, scene_obj_paths)

#     crs = pleiades_get_crs(meta)

    bbox, g, epsg = novasar_get_crs_and_bbox(create_uri([i for i in scene_obj_paths if i.endswith('.tif')][0]))

    item = pystac.Item(id=scene_name,
                      datetime=novasar_get_dt(scene_name),
                      geometry=g.to_wkt(),
                      bbox=bbox,
                      properties={})

    # need to add func for res of novasar
#     item.common_metadata.gsd = pleiades_get_gsd(meta)

    item.ext.enable('projection')
    item.ext.projection.epsg = epsg.to_dict()['init'][5:]
    
    item.ext.enable('sar')
    item.ext.sar.instrument_mode = "6m-Stripmap"
    item.ext.sar.frequency_band = FrequencyBand('S')

    
    return item

In [None]:
novasar_create_item(scene_name, scene_obj_paths)

<Item id=NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog>

In [None]:
novasar_create_item(scene_name, scene_obj_paths).to_dict()

{'type': 'Feature',
 'stac_version': '1.0.0-beta.2',
 'id': 'NovaSAR_01_10135_slc_11_200215_094557_HH_1_ML_TC_TF_cog',
 'properties': {'proj:epsg': '4326',
  'sar:instrument_mode': '6m-Stripmap',
  'sar:frequency_band': 'S',
  'datetime': '2020-02-15T09:45:57Z'},
 'geometry': 'POLYGON ((-1.0478259576205007 50.3976801324599464, -1.0478259576205007 50.6936401639443019, -0.6671061061552759 50.6936401639443019, -0.6671061061552759 50.3976801324599464, -1.0478259576205007 50.3976801324599464))',
 'links': [],
 'assets': {},
 'bbox': [-1.0478259576205007,
  50.397680132459946,
  -0.6671061061552759,
  50.6936401639443],
 'stac_extensions': ['projection', 'sar']}

### **Functions for** ***Asset*** **metadata**

Once we have *Item* level metadata we can add the actual *Assets*. There are a few constants used within these functions at the *Item* level (i.e. band info) and criteria for finding within the object paths.

In [None]:
def novasar_get_pol(asset_path):
    asset_name = os.path.basename(asset_path)
    if '_VV_' in asset_name:
        pol = Polarization('VV')
    elif '_HH_' in asset_name:
        pol = Polarization('HH')
    return pol

In [None]:
novasar_get_pol([os.path.basename(i) for i in scene_obj_paths if i.endswith('.tif')][0])

<Polarization.HH: 'HH'>

In [None]:
def novasar_get_prod_type(asset_path):
    asset_name = os.path.basename(asset_path)
    if 'Gamma0' in asset_path:
        prod = 'gamma0_db'
    return prod

In [None]:
get_prod_type([os.path.basename(i) for i in scene_obj_paths if i.endswith('.tif')][0])

'gamma0_db'

In [None]:
item.ext.sar.polarizations = [Polarization(get_pol(scene_name))]
item.ext.sar.product_type = 'gamma0_dB'
# item.ext.sar.observation_direction