# Generate example static catalog for S1 RTC on AWS

https://registry.opendata.aws/sentinel-1-rtc-indigo/

| Location | MGRS |
| - | - | 
| West coast (WA state) | 10UCU |
| US-canada border | 10UFV |
| Central UTM boundary 12/13 (Colorado) | 12SYJ, 13SBD | 
| Gulf coast | 17RMJ, 15RUN |
| East coast | 19TCH |

With the following nested catalog structure:
```
.
├── <MGRS>
│   └── <YEAR>
│       ├── S1A_20170102_19TCH_ASC.json
│       ├── S1A_20170107_19TCH_ASC.json
│       └── S1A_20170114_19TCH_ASC.json
└── catalog.json
```

In [1]:
import s3fs

import pystac
from pystac import Catalog, Collection, CatalogType
from pystac.layout import TemplateLayoutStrategy
from stactools.sentinel1.stac import create_item, create_collection


%load_ext autoreload
%autoreload 2

In [2]:
# Find earliest date -> 20160729
#s3 = s3fs.S3FileSystem(anon=True)
#files = s3.glob('s3://sentinel-s1-rtc-indigo/tiles/RTC/1/IW/*/*/*/2016/*/')
#dates = [x[-18:-10] for x in files]
#min(dates) 20160729

In [3]:
# install development version of stactools (after git clone https://github.com/scottyhq/sentinel1-rtc-stac.git )
#!pip install --no-deps -e ../stactools

In [4]:
s3 = s3fs.S3FileSystem(anon=True)

In [5]:
# Get URIs for 3 tiles per year for each MGRS square
base_url = 's3://sentinel-s1-rtc-indigo/tiles/RTC/1/IW'
grid_squares = ['10UCU', '10UFV', '12SYJ', '13SBD', '17RMJ', '15RUN', '19TCH']
years = [2016, 2017, 2018, 2019, 2020, 2021]
n_items = 3

paths = []
for mgrs in grid_squares:
    for year in years:
        utm_zone = mgrs[:2] 
        latitude_band = mgrs[2] 
        grid_square = mgrs[3:] 
        s3Path = f'{base_url}/{utm_zone}/{latitude_band}/{grid_square}/{year}'
        items = s3.ls(s3Path)
        if len(items)>0: #some tiles do not have data from 2016 or earlier
            paths += items[:n_items]

In [6]:
len(paths)

108

In [7]:
#write these out to a text file
with open('paths.txt','w') as f:
    f.write('\n'.join(paths))

In [8]:
def s3_to_http(s3path, region='us-west-2'):
    s3prefix = 'sentinel-s1-rtc-indigo'
    newprefix = f'https://sentinel-s1-rtc-indigo.s3.{region}.amazonaws.com'
    http = s3path.replace(s3prefix, newprefix)
    #print(http)
    return http

In [None]:
# Test creating single item
#print(paths[0])
#s3_to_http(f'{paths[0]}')

#import rasterio
#with rasterio.open(s3_to_http(f'{paths[0]}')+'/local_incident_angle.tif') as src:
#    print(src.profile)

In [17]:
%%time
item = create_item(s3_to_http(f'{paths[0]}'))

CPU times: user 849 ms, sys: 120 ms, total: 969 ms
Wall time: 1.6 s


In [19]:
# View the JSON
#import json
#print(json.dumps(item.to_dict(), indent=1))

# Validate in-memory STAC
#item.validate()

# Validate on-disk STAC
# pystac.read_file(f'{dst}/{dst}.json').validate()

In [20]:
%%time 
# Generate all the items serially (could easily parallelize)
items = [create_item(s3_to_http(f'{i}')) for i in paths]

CPU times: user 1min 52s, sys: 16.6 s, total: 2min 9s
Wall time: 3min 50s


In [None]:
# Add items to a catalog
#catalog = Catalog(id='sentinel1-rtc',
#                  description='Sentinel1 radiometric terrain corrected backscatter (RTC)'
#                 )

#catalog.add_items(items)
#print(len(catalog)) #TypeError: object of type 'Catalog' has no len()
#catalog.describe()

# Save the catalog in current directory
#catalog.normalize_hrefs('./')
#catalog.validate()
#catalog.save(catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED)

In [23]:
collection = create_collection()
collection.add_items(items)
collection.describe()

* <Collection id=sentinel1-rtc-aws>
  * <Item id=S1A_20170101_10UCU_ASC>
  * <Item id=S1A_20170125_10UCU_ASC>
  * <Item id=S1B_20170102_10UCU_ASC>
  * <Item id=S1B_20180102_10UCU_ASC>
  * <Item id=S1B_20180107_10UCU_ASC>
  * <Item id=S1B_20180109_10UCU_ASC>
  * <Item id=S1B_20190102_10UCU_ASC>
  * <Item id=S1B_20190104_10UCU_ASC>
  * <Item id=S1B_20190109_10UCU_ASC>
  * <Item id=S1B_20200109_10UCU_ASC>
  * <Item id=S1B_20200111_10UCU_ASC>
  * <Item id=S1B_20200121_10UCU_ASC>
  * <Item id=S1B_20210101_10UCU_DSC>
  * <Item id=S1B_20210105_10UCU_ASC>
  * <Item id=S1B_20210108_10UCU_DSC>
  * <Item id=S1B_20170102_10UFV_ASC>
  * <Item id=S1B_20170104_10UFV_ASC>
  * <Item id=S1B_20170121_10UFV_ASC>
  * <Item id=S1B_20180104_10UFV_ASC>
  * <Item id=S1B_20180109_10UFV_ASC>
  * <Item id=S1B_20180111_10UFV_ASC>
  * <Item id=S1B_20190104_10UFV_ASC>
  * <Item id=S1B_20190106_10UFV_ASC>
  * <Item id=S1B_20190111_10UFV_ASC>
  * <Item id=S1B_20200101_10UFV_ASC>
  * <Item id=S1B_20200106_10UFV_ASC>
  

In [24]:
# Save the catalog in current directory
template = '${sentinel:mgrs}/${year}'
collection.generate_subcatalogs(template=template)
collection.describe()

* <Collection id=sentinel1-rtc-aws>
    * <Catalog id=10UCU>
        * <Catalog id=2017>
          * <Item id=S1A_20170101_10UCU_ASC>
          * <Item id=S1A_20170125_10UCU_ASC>
          * <Item id=S1B_20170102_10UCU_ASC>
        * <Catalog id=2018>
          * <Item id=S1B_20180102_10UCU_ASC>
          * <Item id=S1B_20180107_10UCU_ASC>
          * <Item id=S1B_20180109_10UCU_ASC>
        * <Catalog id=2019>
          * <Item id=S1B_20190102_10UCU_ASC>
          * <Item id=S1B_20190104_10UCU_ASC>
          * <Item id=S1B_20190109_10UCU_ASC>
        * <Catalog id=2020>
          * <Item id=S1B_20200109_10UCU_ASC>
          * <Item id=S1B_20200111_10UCU_ASC>
          * <Item id=S1B_20200121_10UCU_ASC>
        * <Catalog id=2021>
          * <Item id=S1B_20210101_10UCU_DSC>
          * <Item id=S1B_20210105_10UCU_ASC>
          * <Item id=S1B_20210108_10UCU_DSC>
    * <Catalog id=10UFV>
        * <Catalog id=2017>
          * <Item id=S1B_20170102_10UFV_ASC>
          * <Item id=S1B_2

In [26]:
collection.normalize_hrefs('./')
collection.validate()

In [27]:
collection.save(catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED)

# Modify existing catalog

In [None]:
#catalog = pystac.read_file('catalog.json')

In [None]:
#item = catalog.get_item('S1A_20170101_10UCU_ASC')
#item.properties

In [None]:
#template = '${sentinel:utm_zone}${sentinel:latitude_band}${sentinel:grid_square}/${year}'
#strategy = TemplateLayoutStrategy(item_template=template)
#catalog.normalize_hrefs('./test', strategy=strategy)
#catalog.save(catalog_type=CatalogType.SELF_CONTAINED)